/* Copyright 2012 Tobias Marschall
 * 
 * This file is part of CLEVER.
 * 
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <fstream>
#include <vector>
#include <cassert>
#include <ctime>
#include <iomanip>
#include <limits>

#include <boost/program_options.hpp>
#include <boost/unordered_set.hpp>
#include <boost/tokenizer.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/math/distributions.hpp>

#include <bamtools/api/BamReader.h>

#include "Variation.h"
#include "VariationUtils.h"
#include "VariationListParser.h"
#include "LengthAwareVariationIndex.h"
#include "FastaReader.h"
#include "Genotyper.h"
#include "GenotypeDistribution.h"
#include "BamHelper.h"
#include "HistogramBasedDistribution.h"
#include "VersionInfo.h"

using namespace std;
using namespace boost;
namespace po = boost::program_options;

void usage(const char* name, const po::options_description& options_desc) {
	cerr << "Usage: " << name << " [options] <dataset-list-file> <readgroup-file> <ref.fasta(.gz)> <deletion-list>" << endl;
	cerr << endl;
	cerr << "Reads recalibrated BAM files produced by LASER and determines the genotype of the" << endl;
	cerr << "deletions given in <deletion-list>, in the given order. Rank deletions by" << endl;
	cerr << "confidence for best results (high-confidence deletions first)." << endl;
	cerr << endl;
	cerr << "<dataset-list-file> is a three column file:" << endl;
	cerr << "    <sample-name> <bam-file> <role>" << endl;
	cerr << "where <role> can be any of none/mother/father/child/monozygotic_twin/dizygotic_twin" << endl;
	cerr << endl;
	cerr << "<readgroup-file> a three column file:" << endl;
	cerr << "    <sample-name> <readgroup-name> <insert-size-dist-file>" << endl;
	cerr << endl;
	cerr << options_desc << endl;
	exit(1);
}

typedef boost::unordered_map<string,int> dataset_name_map_t;

typedef enum { NONE, MOTHER, FATHER, SINGLE_CHILD, MONOZYGOTIC_TWIN, DIZYGOTIC_TWIN } role_t;
typedef enum { NO_TWINS, MONOZYGOTIC, DIZYGOTIC } twin_type_t;

typedef struct dataset_t {
	string name;
	role_t role;
	BamTools::BamReader* bam_reader;
	Genotyper* genotyper;
	Genotyper::read_name_set_t used_reads;
	Genotyper::readgroup_params_map_t readgroup_params;
	dataset_t(string name, string bam_filename, string role) : name(name), role(NONE), bam_reader(0), genotyper(0) {
		bam_reader = new BamTools::BamReader();
		if ((bam_filename.size() < 4) || (bam_filename.substr(bam_filename.size()-4,4).compare(".bam") != 0)) {
			ostringstream oss;
			oss << "Filename doesn't end on \".bam\": \"" << bam_filename << "\".";
			throw std::runtime_error(oss.str());
		}
		if (!bam_reader->Open(bam_filename)) {
			ostringstream oss;
			oss << "Could not read BAM input from \"" << bam_filename << "\".";
			throw std::runtime_error(oss.str());
		}
		if (!bam_reader->OpenIndex(bam_filename + ".bai")) {
			string index_filename(bam_filename);
			index_filename.replace(bam_filename.size()-3,3,"bai");
			if (!bam_reader->OpenIndex(index_filename)) {
				ostringstream oss;
				oss << "Could not locate index for BAM file \"" << bam_filename << "\".";
				throw std::runtime_error(oss.str());
			}
		}
		if (role.compare("none") == 0) {
			this->role = NONE;
		} else if (role.compare("mother") == 0) {
			this->role = MOTHER;
		} else if (role.compare("father") == 0) {
			this->role = FATHER;
		} else if (role.compare("child") == 0) {
			this->role = SINGLE_CHILD;
		} else if (role.compare("monozygotic_twin") == 0) {
			this->role = MONOZYGOTIC_TWIN;
		} else if (role.compare("dizygotic_twin") == 0) {
			this->role = DIZYGOTIC_TWIN;
		} else {
			ostringstream oss;
			oss << "Dataset \"" << name << "\": invalid role \"" << role << "\".";
			throw std::runtime_error(oss.str());
		}
	}
	void set_readgroup_params(const string& readgroup, const Genotyper::mean_and_stddev_t& params) {
		readgroup_params[readgroup] = params;
	}
} dataset_t;

// information that is output per combination of variation and data set
typedef struct output_stats_t {
	GenotypeDistribution::genotype_t genotype;
	Genotyper::variation_stats_t genotyper_stats;
	// genotype distribution without use of priors or family information
	GenotypeDistribution raw_genotype_distribution;
	// posteriors genotype distribution after use of priors and family information
	GenotypeDistribution genotype_distribution;
	output_stats_t() : genotype(GenotypeDistribution::ABSENT) {}
} output_stats_t;

ostream& operator<<(ostream& os, const dataset_t& d) {
	os << d.name << " " << d.role;
	Genotyper::readgroup_params_map_t::const_iterator it = d.readgroup_params.begin();
	for (; it != d.readgroup_params.end(); ++it) {
		os << " " << it->first << ":" << it->second.mean << "," << it->second.stddev;
	}
	return os;
}

void read_dataset_list(const string& filename, vector<dataset_t>* result) {
	assert(result != 0);
	ifstream is(filename.c_str());
	if (is.fail()) {
		cerr << "Error: could not open \"" << filename << "\"." << endl;
		exit(1);
	}
	typedef boost::tokenizer<boost::char_separator<char> > tokenizer_t;
	boost::char_separator<char> whitespace_separator(" \t");
	string line;
	int linenr = 0;
	while (getline(is,line)) {
		linenr += 1;
		tokenizer_t tokenizer(line, whitespace_separator);
		vector<string> tokens(tokenizer.begin(), tokenizer.end());
		if (tokens.size() != 3) {
			ostringstream oss;
			oss << "Error parsing data set list. Offending line: " << linenr;
			throw std::runtime_error(oss.str());
		}
		result->push_back(dataset_t(tokens[0], tokens[1], tokens[2]));
		int i = result->size() - 1;
		cerr << "Data set " << i << ": " << result->at(i) << endl;
	}
}

typedef struct family_information_t {
	/** Is family information present at all? */
	bool present;
	int mother_idx;
	int father_idx;
	int child0_idx;
	int child1_idx;
	twin_type_t twin_type;
	family_information_t() : present(false), mother_idx(-1), father_idx(-1), child0_idx(-1), child1_idx(-1), twin_type(NO_TWINS) {}
	family_information_t(const vector<dataset_t>& datasets) : present(false), mother_idx(-1), father_idx(-1), child0_idx(-1), child1_idx(-1), twin_type(NO_TWINS) {
		for (size_t i=0; i<datasets.size(); ++i) {
			switch (datasets[i].role) {
				case NONE: break;
				case MOTHER:
					present = true;
					mother_idx = i;
					break;
				case FATHER:
					present = true;
					father_idx = i;
					break;
				case SINGLE_CHILD:
					present = true;
					child0_idx = i;
					break;
				case MONOZYGOTIC_TWIN:
					present = true;
					if (child0_idx == -1) child0_idx = i;
					else child1_idx = i;
					if (twin_type == DIZYGOTIC) throw std::runtime_error("Inconsistent twin information: must be either monozygotic or dizygotic.");
					twin_type = MONOZYGOTIC;
					break;
				case DIZYGOTIC_TWIN:
					present = true;
					if (child0_idx == -1) child0_idx = i;
					else child1_idx = i;
					if (twin_type == MONOZYGOTIC) throw std::runtime_error("Inconsistent twin information: must be either monozygotic or dizygotic.");
					twin_type = DIZYGOTIC;
					break;
				default:
					assert(false);
			}
		}
		if (present) {
			if (datasets.size() == 3) {
				if ((mother_idx == -1) || (father_idx == -1) || (child0_idx == -1) || (child1_idx != -1) || (twin_type != NO_TWINS)) {
					throw std::runtime_error("Inconsistent trio information: must have exactly one father, mother, and child.");
				}
			} else if (datasets.size() == 4) {
				if ((mother_idx == -1) || (father_idx == -1) || (child0_idx == -1) || (child1_idx == -1) || (twin_type == NO_TWINS)) {
					throw std::runtime_error("Inconsistent quartet information: must have exactly one father, mother, and two twin children.");
				}
			} else {
				throw std::runtime_error("To use family-aware genotyping (i.e. setting <role> to values other than none), there must be exactly three (trio) or four (quartet) datasets.");
			}
		}
	}
} family_information_t;

ostream& operator<<(ostream& os, const family_information_t& fi) {
	os << (fi.present?"family":"unrelated") << ", mother:" << fi.mother_idx << ", father:" << fi.father_idx << ", child0:" << fi.child0_idx << ", child1:" << fi.child1_idx << ", twin_type: " << fi.twin_type;
	return os;
}

void read_readgroup_list(const string& filename, vector<dataset_t>* datasets, const dataset_name_map_t& dataset_name_map) {
	assert(datasets != 0);
	ifstream is(filename.c_str());
	if (is.fail()) {
		cerr << "Error: could not open \"" << filename << "\"." << endl;
		exit(1);
	}
	typedef boost::tokenizer<boost::char_separator<char> > tokenizer_t;
	boost::char_separator<char> whitespace_separator(" \t");
	string line;
	int linenr = 0;
	while (getline(is,line)) {
		linenr += 1;
		tokenizer_t tokenizer(line, whitespace_separator);
		vector<string> tokens(tokenizer.begin(), tokenizer.end());
		if (tokens.size() != 3) {
			ostringstream oss;
			oss << "Error parsing read group list. Offending line: " << linenr;
			throw std::runtime_error(oss.str());
		}
		dataset_name_map_t::const_iterator it = dataset_name_map.find(tokens[0]);
		if (it == dataset_name_map.end()) {
			ostringstream oss;
			oss << "Error parsing read group list. Dataset \"" << tokens[0] << "\" unknown in line " << linenr << ".";
			throw std::runtime_error(oss.str());
		}
		assert(it->second < datasets->size());
		dataset_t& d = datasets->at(it->second);
		HistogramBasedDistribution distribution(tokens[2]);
		Genotyper::mean_and_stddev_t params;
		distribution.estimateGaussian(&params.mean, &params.stddev);
		d.set_readgroup_params(tokens[1], params);
	}
}

string get_date_string() {
	time_t t = time(0);
	struct tm* now = localtime(&t);
	ostringstream oss;
	oss << (now->tm_year+1900) << setfill('0') << setw(2) << (now->tm_mon+1) << setw(2) << now->tm_mday;
	return string(oss.str());
}

/*
void test_compute_genotype(int supporting_reads, int total_reads, double fp_prob, double fn_prob) {
	cerr << "TEST: " << supporting_reads << "/" << total_reads << ", p_FP:" << fp_prob << ", p_FN:" << fn_prob << endl;
	// compute probability of no variant, i.e. that all reads indicate that there is no variant,
	// i.e. all supporting reads are false positives and the rest are true negatives
	double no_variant_prob = boost::math::pdf(boost::math::binomial_distribution<>(total_reads, fp_prob), supporting_reads);
	// compute probability of a homozyguous variant
	double homozyguous_prob = boost::math::pdf(boost::math::binomial_distribution<>(total_reads, 1.0-fn_prob), supporting_reads);
	// compute probability of a heterozyguous variant
	double hetero_prob = boost::math::pdf(boost::math::binomial_distribution<>(total_reads, 0.5*fp_prob + 0.5*(1.0-fn_prob)), supporting_reads);
	double p_sum = no_variant_prob + homozyguous_prob + hetero_prob;
// 	cerr << "no_variant_prob: " << no_variant_prob << ", hetero_prob: " << hetero_prob << ", homozyguous_prob: " << homozyguous_prob << endl;
	cerr << "  --> ABSENT: " << (no_variant_prob/p_sum) << ", HET:" <<  (hetero_prob/p_sum) << ", HOMO:" << (homozyguous_prob/p_sum) << endl;
}
*/

int phred(double p) {
	return (int)round(min(255.0,-log10(p)*10.0));
}

typedef struct variation_comparator_t{
	const boost::unordered_map<Variation,int>& support_counts;
	variation_comparator_t(const boost::unordered_map<Variation,int>& support_counts) : support_counts(support_counts) {}
	bool operator()(const Variation& v1, const Variation& v2) {
		boost::unordered_map<Variation,int>::const_iterator v1_it = support_counts.find(v1);
		boost::unordered_map<Variation,int>::const_iterator v2_it = support_counts.find(v2);
		assert(v1_it != support_counts.end());
		assert(v2_it != support_counts.end());
		return v1_it->second > v2_it->second;
	}
} variation_comparator_t;

int main(int argc, char* argv[]) {
	VersionInfo::checkAndPrintVersion("genotyper", cerr);
	string commandline = VersionInfo::commandline(argc, argv);

	// PARAMETERS
//	int max_distance_split_clever;
//	int max_length_diff_split_clever;
	int max_distance_split_split;
	int max_length_diff_split_split;
//	int min_length;
//	double internal_segment_length_mean;
//	double internal_segment_length_stddev;
	int bam_window_size;
//	bool genotyping = false;
//	bool trio_aware = false;
//	bool split_reads_from_bam = false;
	int min_mapq;
	double denovo_threshold;
	double variant_prior;
	bool genotyping_only_inserts = false;
	bool genotyping_only_splits = false;
	int min_gq;
	int min_phys_cov;
	int gq_present_threshold;
	bool omit_absent = false;
	bool dont_prioritize = false;
	int split_read_border_size;
	int max_del_output_length;
	
	po::options_description options_desc("Allowed options");
	options_desc.add_options()
		("max_offset_split,O", po::value<int>(&max_distance_split_split)->default_value(10), "Maximum allowed distance between two split read calls.")
		("max_length_diff_split,Z", po::value<int>(&max_length_diff_split_split)->default_value(5), "Maximum allowed length difference between two split read calls.")
		("split_border_distance,b", po::value<int>(&split_read_border_size)->default_value(12), "Minimum distance of variant breakpoint to start/end of alignment in order to use read.")
		("bam_window,w", po::value<int>(&bam_window_size)->default_value(1000), "Number of nucleotides to look to the right/to the left of deletions in BAM files to find relevant alignments.")
		("denovo_threshold,d", po::value<double>(&denovo_threshold)->default_value(1e-5), "Threshold for de novo calls (in trio/quartet mode)")
		("mapq,m", po::value<int>(&min_mapq)->default_value(0), "Minimum MAPQ. Alignments with lower MAPQ are ignored.")
		("variant_prior,p", po::value<double>(&variant_prior)->default_value(0.5), "Prior believe in a variant (given that the locus is under investigation).")
		("gt_only_insert,I", po::value<bool>(&genotyping_only_inserts)->zero_tokens(), "Do genotyping only based on internal-segment-type alignments.")
		("gt_only_split,S", po::value<bool>(&genotyping_only_splits)->zero_tokens(), "Do genotyping only based on split-type alignments.")
		("min_gq", po::value<int>(&min_gq)->default_value(20), "Minimum genotype quality (GQ). If GQ is below threshold, then it is not reported and \"./.\" is output instead. However, if distinction absent vs. present is possible, then \"1/.\" is reported, see option --gq-present.")
		("min_phys_cov", po::value<int>(&min_phys_cov)->default_value(5), "Minimum physical coverage to determine genotype if less, then \"./.\" is output for this deletion/individual (default=5).")
		("gq-present", po::value<int>(&gq_present_threshold)->default_value(20), "In case genotype cannot be called (due to bad GQ or due to insufficient physical coverage), then report genotype \"1/.\" (i.e. \"present\") if phred score of genotype \"0/0\" is above this threshold.")
		("omit-absent", po::value<bool>(&omit_absent)->zero_tokens(), "Omit lines where variant is absent in all samples.")
		("dont-prioritize", po::value<bool>(&dont_prioritize)->zero_tokens(), "By default indels with most split read support genotyped first, giving them precedence over indels with less support. This option disables this and genotypes indels as they appear in the input.")
		("max-output-length", po::value<int>(&max_del_output_length)->default_value(9999), "Maximum length of deletion for which the REF allele is output to the VCF. Above that, the <DEL> notation is used (defaul: 9999).")
	;

	if (argc<5) {
		usage(argv[0], options_desc);
	}
	string dataset_list_filename(argv[argc-4]);
	string readgroup_list_filename(argv[argc-3]);
	string reference_filename(argv[argc-2]);
	string deletion_filename(argv[argc-1]);
	argc -= 4;

	po::variables_map options;
	try {
		po::store(po::parse_command_line(argc, argv, options_desc), options);
		po::notify(options);
	} catch(std::exception& e) {
		cerr << "error: " << e.what() << "\n";
		return 1;
	}

//	typedef boost::unordered_map<Variation, variation_stats_t*> split_variation_map_t;
//	split_variation_map_t split_variation_map;
	vector<dataset_t> datasets;
	dataset_name_map_t dataset_name_map;

	family_information_t family_information;
	try {
		// Read information on datasets
		read_dataset_list(dataset_list_filename, &datasets);
		for (size_t i=0; i<datasets.size(); ++i) {
			dataset_t& d = datasets[i];
			dataset_name_map_t::const_iterator name_it = dataset_name_map.find(d.name);
			if (name_it != dataset_name_map.end()) {
				cerr << "Error: duplicate dataset name: \"" << d.name << "\"." << endl;
				return 1;
			}
			dataset_name_map[d.name] = i;
		}
		// Determine whether to do family-aware genotyping
		family_information = family_information_t(datasets);
		cerr << "family information: " << family_information << endl;
	} catch(std::exception& e) {
		cerr << "error: " << e.what() << "\n";
		return 1;
	}
	// Read readgroup-specific insert size distributions
	read_readgroup_list(readgroup_list_filename, &datasets, dataset_name_map);
	for (size_t i=0; i<datasets.size(); ++i) {
		cerr << datasets[i] << endl;
	}
	// Create dataset-specific genotyper objects
	for (size_t i=0; i<datasets.size(); ++i) {
		dataset_t& d = datasets[i];
		d.genotyper = new Genotyper(&d.readgroup_params, variant_prior, max_length_diff_split_split, max_distance_split_split, min_mapq, bam_window_size, !genotyping_only_splits, !genotyping_only_inserts, split_read_border_size);
	}
	ifstream input_variations_is(deletion_filename.c_str());
	if (input_variations_is.fail()) {
		cerr << "Error: could not open \"" << deletion_filename << "\"." << endl;
		return 1;
	}

	auto_ptr<vector<Variation> > input_variations = VariationListParser::parse(input_variations_is, false, -1, 0);
	cerr << "Read " << input_variations->size() << " variants to be genotyped." << endl;
	
	if (!dont_prioritize) {
		// Do one pass over all BAM files to find supporting reads and genotype in the order of most support
		cerr << "Looking for supporting split reads to determine order in which to genotype variants" << endl;
		boost::unordered_map<Variation,int> variant_support_counts;
		for (size_t i=0; i<input_variations->size(); ++i) {
			variant_support_counts[input_variations->at(i)] = 0;
		}
		for (size_t i=0; i<datasets.size(); ++i) {
			dataset_t& d = datasets[i];
			BamHelper::findSupportingReads(*(d.bam_reader), bam_window_size, false, &variant_support_counts, min_mapq);
		}
		cerr << "Sorting variants by (exact) split read support." << endl;
		sort(input_variations->begin(), input_variations->end(), variation_comparator_t(variant_support_counts));
	}

	// load reference genome
	auto_ptr<FastaReader::reference_map_t> reference_map = FastaReader::parseFromFile(reference_filename);
	cerr << "Read " << reference_map->size() << " reference sequences." << endl;
	clock_t clock_start = clock();
	// output header
	cout << "##fileformat=VCFv4.1" << endl;
	cout << "##fileDate=" << get_date_string() << endl;
	cout << "##source=genotyper-" << VersionInfo::version() << " cmdline: " << commandline << endl;
	cout << "##reference=" << reference_filename << endl;
	cout << "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">" << endl;
	cout << "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">" << endl;
	cout << "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">" << endl;
	cout << "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype quality: phred scaled probability that the genotype is wrong.\">" << endl;
	cout << "##FORMAT=<ID=PL,Number=.,Type=Integer,Description=\"Comma-separated phred-scaled genotype likelihoods for absent, heterozyguous, homozyguous. These probabilities are BEFORE using any priors or family-information (if any).\">" << endl;
	cout << "##FORMAT=<ID=AD,Number=.,Type=Integer,Description=\"Total allele depth (ie IE plus SE) in two comma-separated values: REF support, ALT support\">" << endl;
	cout << "##FORMAT=<ID=IE,Number=.,Type=Integer,Description=\"Internal segment (ie read pair) evidence in two comma-separated values: REF support, ALT support\">" << endl;
	cout << "##FORMAT=<ID=SE,Number=.,Type=Integer,Description=\"Split-read evidence in two comma-separated values: REF support, ALT support\">" << endl;
	cout << "##FORMAT=<ID=PP,Number=.,Type=Integer,Description=\"Same as PL, but AFTER using prior probabilities and family information.\">" << endl;
	cout << "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT";
	for (size_t dataset_idx=0; dataset_idx<datasets.size(); ++dataset_idx) {
		cout << '\t' << datasets[dataset_idx].name;
	}
	cout << endl;

	OverlappingRegions masked_regions;
	// iterate over all variations to be genotyped
	for (size_t i=0; i<input_variations->size(); ++i) {
		const Variation& v = input_variations->at(i);
		if ((v.getType() != Variation::DELETION) && (v.getType() != Variation::INSERTION)) {
			cerr << "Warning: found unknown variation type in line " << (i+1) << endl;
			continue;
		}
		// create vector with records to be output per dataset
		vector<output_stats_t> output_stats(datasets.size(), output_stats_t());
		// if bam files are present determine coverage etc.
		bool skip = false;
		for (size_t dataset_idx=0; dataset_idx<datasets.size(); ++dataset_idx) {
			dataset_t& d = datasets[dataset_idx];
			assert(d.bam_reader != 0);
			int ref_id = d.bam_reader->GetReferenceID(v.getChromosome());
			if (ref_id < 0) {
				cerr << "Error: reference " << v.getChromosome() << " not found in BAM file." << endl;
				return 1;
			}
			assert(d.genotyper != 0);
			auto_ptr<Genotyper::variation_stats_t> genotyper_stats =  d.genotyper->extractVariationStats(v, *(d.bam_reader),  &masked_regions, &(d.used_reads));
			if (genotyper_stats.get() == 0) {
				skip = true;
				break;
			}
			// Compute per individual genotype distribution (trio/quartet-aware correction will be done later)
			output_stats_t& o = output_stats[dataset_idx];
			o.genotyper_stats = *genotyper_stats;
// 			cerr << "Genotyping deletion at " << v.getChromosome() << ' ' <<  v.getCoordinate1() - 1 << endl;
// 			cerr << "  insert size (REF/ALT): " << o.genotyper_stats.insert_evidence.ref_support << "/" << o.genotyper_stats.insert_evidence.alt_support << endl;
// 			cerr << "  split reads (REF/ALT):" << o.genotyper_stats.split_evidence.ref_support << "/" << o.genotyper_stats.split_evidence.alt_support << endl;
			auto_ptr<GenotypeDistribution> genotype_distribution = d.genotyper->computeGenotype(v, o.genotyper_stats, &o.raw_genotype_distribution);
			assert(genotype_distribution.get() != 0);
			o.genotype_distribution = *genotype_distribution;
			o.genotype = genotype_distribution->likeliestGenotype();
		}
		if (skip) {
			cerr << "Warning: skipping variation (either coverage too high or reference unknown)." << endl;
			continue;
		}
		// if trio/quartet-aware, then re-genotype all three/four samples
		if (family_information.present) {
			auto_ptr<Genotyper::trio_genotype_t> trio_genotype(0);
			auto_ptr<Genotyper::quartet_genotype_t> quartet_genotype(0); 
			switch (family_information.twin_type) {
				case NO_TWINS:
					// trio case
					trio_genotype = Genotyper::genotypeTrio(
						output_stats[family_information.mother_idx].genotype_distribution,
						output_stats[family_information.father_idx].genotype_distribution,
						output_stats[family_information.child0_idx].genotype_distribution,
						denovo_threshold
					);
					output_stats[family_information.mother_idx].genotype = trio_genotype->mother;
					output_stats[family_information.father_idx].genotype = trio_genotype->father;
					output_stats[family_information.child0_idx].genotype = trio_genotype->child;
					output_stats[family_information.mother_idx].genotype_distribution = trio_genotype->mother_posterior;
					output_stats[family_information.father_idx].genotype_distribution = trio_genotype->father_posterior;
					output_stats[family_information.child0_idx].genotype_distribution = trio_genotype->child_posterior;
					break;
				case MONOZYGOTIC:
				case DIZYGOTIC:
					// quartet case
					quartet_genotype = Genotyper::genotypeQuartet(
						output_stats[family_information.mother_idx].genotype_distribution,
						output_stats[family_information.father_idx].genotype_distribution,
						output_stats[family_information.child0_idx].genotype_distribution,
						output_stats[family_information.child1_idx].genotype_distribution,
						denovo_threshold,
						family_information.twin_type == MONOZYGOTIC
					);
					output_stats[family_information.mother_idx].genotype = quartet_genotype->mother;
					output_stats[family_information.father_idx].genotype = quartet_genotype->father;
					output_stats[family_information.child0_idx].genotype = quartet_genotype->child1;
					output_stats[family_information.child1_idx].genotype = quartet_genotype->child2;
					output_stats[family_information.mother_idx].genotype_distribution = quartet_genotype->mother_posterior;
					output_stats[family_information.father_idx].genotype_distribution = quartet_genotype->father_posterior;
					output_stats[family_information.child0_idx].genotype_distribution = quartet_genotype->child1_posterior;
					output_stats[family_information.child1_idx].genotype_distribution = quartet_genotype->child2_posterior;
					break;
				default:
					assert(false);
			}
		}
		FastaReader::reference_map_t::const_iterator ref = reference_map->find(v.getChromosome());
		if (ref == reference_map->end()) {
			cerr << "Unknown reference sequence: " << v.getChromosome() << endl;
			return 1;
		}
		ostringstream line;
		line << v.getChromosome(); // CHROM
		line << '\t' << v.getCoordinate1(); // POS
		line << '\t' << "."; // ID
		if (v.getType() == Variation::DELETION) {
			int length = v.getCoordinate2()-v.getCoordinate1();
			if (length > max_del_output_length) {
				line << '\t' << 'N'; // REF
				line << '\t' << "<DEL>"; // ALT
			} else {
				line << '\t' << ref->second->substr(v.getCoordinate1()-1, length + 1); // REF
				line << '\t' << (*ref->second)[v.getCoordinate1()-1]; // ALT
			}
			line << '\t' << "."; // QUAL
			line << '\t' << "PASS"; // FILTER
			line << "\tSVTYPE=DEL;SVLEN=" << -length; // INFO
		} else if (v.getType() == Variation::INSERTION) {
			int length = v.getCoordinate2();
			line << '\t' << (*ref->second)[v.getCoordinate1()-1]; // REF
			line << '\t' << (*ref->second)[v.getCoordinate1()-1] << v.getSequence(); // ALT
			line << '\t' << "."; // QUAL
			line << '\t' << "PASS"; // FILTER
			line << "\tSVTYPE=INS;SVLEN=" << length; // INFO
			
		} else {
			assert(false);
		}
		line << '\t' << "GT:GQ:PL:AD:IE:SE:PP"; // FORMAT
		// number of samples where the ALT genotype is present (i.e. either heterozyguous or homozyguous)
		int present_count = 0;
		for (size_t dataset_idx=0; dataset_idx<datasets.size(); ++dataset_idx) {
			output_stats_t& o = output_stats[dataset_idx];
			int ie_ref = o.genotyper_stats.insert_evidence.ref_support;
			int ie_alt = o.genotyper_stats.insert_evidence.alt_support;
			int se_ref = o.genotyper_stats.split_evidence.ref_support;
			int se_alt = o.genotyper_stats.split_evidence.alt_support;
			int gq = phred(o.genotype_distribution.errorProbability(o.genotype));
			int physical_coverage = o.genotyper_stats.split_evidence.coverage() + o.genotyper_stats.insert_evidence.coverage();
			int not_present_score = phred(o.genotype_distribution.notPresent());
			string genotype = "./.";
			if ((gq>=min_gq) && (physical_coverage>=min_phys_cov)) {
				genotype = GenotypeDistribution::genotypeToString(o.genotype);
				if (o.genotype != GenotypeDistribution::ABSENT) {
					present_count += 1;
				}
			} else if (not_present_score >= gq_present_threshold) {
				gq = not_present_score;
				genotype = "1/.";
				present_count += 1;
			}
			line << '\t' << genotype
				<< ":" << gq
				<< ":" << o.raw_genotype_distribution.toPLString()
				<< ":" << ie_ref+se_ref << ',' << ie_alt+se_alt
				<< ":" << ie_ref << ',' << ie_alt
				<< ":" << se_ref << ',' << se_alt
				<< ":" << o.genotype_distribution.toPLString();
		}
		if (!(omit_absent && (present_count == 0))) {
			cout << line.str() << endl; 
		}
	}
	double cpu_time = (double)(clock() - clock_start) / CLOCKS_PER_SEC;
	cerr << "Total CPU time: " << cpu_time << " (excluding preprocessing such as reading input files)" << endl;
	return 0;
}
